-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[Clang][AArch64] Fix feature guards for SVE2p1 builtins available in SME{2}. #147086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Clang][AArch64] Fix feature guards for SVE2p1 builtins available in SME{2}. #147086
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesBuiltins that are enabled via +sve2p1 in non-streaming mode and +sme{2} in streaming mode should also be enabled via +sve+sme{2} in non-streaming mode and +sme+sve2p1 in streaming mode. Patch is 55.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147086.diff 21 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 76fd072a41d8b..ac6f89f9afdbc 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -264,22 +264,22 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
}
-multiclass StructLoad<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructLoad])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructLoad])>;
}
}
// Load N-element structure into N vectors (scalar base)
-defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">;
-defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">;
-defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">;
+defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load N-element structure into N vectors (scalar base, VL displacement)
-defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">;
-defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">;
-defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">;
+defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
@@ -434,21 +434,21 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
} // let SVETargetGuard = "sve"
-multiclass StructStore<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructStore])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructStore])>;
}
}
// Store N vectors into N-element structure (scalar base)
-defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2">;
-defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3">;
-defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4">;
+defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store N vectors into N-element structure (scalar base, VL displacement)
-defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2">;
-defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
-defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
+defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store one vector, with no truncation, non-temporal (scalar base)
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
@@ -1325,7 +1325,7 @@ def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsT
def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVCREATE_2_B : SInst<"svcreate2[_b]", "2dd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
def SVCREATE_4_B : SInst<"svcreate4[_b]", "4dddd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
}
@@ -1350,18 +1350,17 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet
def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVGET_2_B : SInst<"svget2[_b]", "d2i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
def SVGET_4_B : SInst<"svget4[_b]", "d4i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
def SVSET_2_B : SInst<"svset2[_b]", "22id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
def SVSET_4_B : SInst<"svset4[_b]", "44id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
-}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVUNDEF_2_B: Inst<"svundef2_b", "2", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>;
def SVUNDEF_4_B: Inst<"svundef4_b", "4", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>;
}
+
////////////////////////////////////////////////////////////////////////////////
// SVE2 WhileGE/GT
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
@@ -1375,7 +1374,7 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PcPsPiPl", MergeNone, "
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2", [VerifyRuntimeMode]>;
def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2", [VerifyRuntimeMode]>;
def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2", [VerifyRuntimeMode]>;
@@ -1384,7 +1383,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2", [VerifyRuntimeMode]>;
def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2", [VerifyRuntimeMode]>;
def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2", [VerifyRuntimeMode]>;
-
}
////////////////////////////////////////////////////////////////////////////////
@@ -1999,7 +1997,7 @@ def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sv
def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x", [VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
+let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in {
def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [VerifyRuntimeMode], []>;
def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [VerifyRuntimeMode], []>;
@@ -2093,16 +2091,18 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
defm STNT1 : MultiVecStore<"stnt1">;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>;
def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [VerifyRuntimeMode], []>;
def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [VerifyRuntimeMode], []>;
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
+
+def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
+let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [VerifyRuntimeMode], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [VerifyRuntimeMode], []>;
@@ -2114,7 +2114,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, VerifyRuntimeMode]>;
- def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>;
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
}
@@ -2271,7 +2270,9 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,faminmax" in {
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
+}
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
// SQRSHRN / UQRSHRN
def SVQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "i", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
def SVUQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Ui", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
@@ -2383,7 +2384,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
//
// Multi-vector saturating extract narrow and interleave
//
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVQCVTN_S16_S32_X2 : SInst<"svqcvtn_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvtn_x2", [VerifyRuntimeMode], []>;
def SVQCVTN_U16_U32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvtn_x2", [VerifyRuntimeMode], []>;
def SVQCVTN_U16_S32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtun_x2", [VerifyRuntimeMode], []>;
@@ -2448,7 +2449,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
def SVFCVTN_X4 : Inst<"svcvtn_mf8[_{d}_x4]", "~4>", "f", MergeNone, "aarch64_sve_fp8_cvtn_x4", [IsOverloadNone, IsStreaming], []>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
// == BFloat16 multiply-subtract ==
def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, VerifyRuntimeMode], []>;
def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, VerifyRuntimeMode], []>;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 76318a068ce6a..bd603a925d15e 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
- llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
- CallerFeatureMapWithoutSVE["sve"] = false;
+ llvm::StringMap<bool> CallerFeatures;
+ S.Context.getFunctionFeatureMap(CallerFeatures, FD);
// Avoid emitting diagnostics for a function that can never compile.
- if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+ if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;
- llvm::StringMap<bool> CallerFeatureMapWithoutSME;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
- CallerFeatureMapWithoutSME["sme"] = false;
+ const auto FindTopLevelPipe = [](const char *S) {
+ unsigned Depth = 0;
+ unsigned I = 0, E = strlen(S);
+ for (; I < E; ++I) {
+ if (S[I] == '|' && Depth == 0)
+ break;
+ if (S[I] == '(')
+ ++Depth;
+ else if (S[I] == ')')
+ --Depth;
+ }
+ return I;
+ };
+
+ const char *RequiredFeatures =
+ S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+ unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+ assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+ "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+ StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+ StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
- // We know the builtin requires either some combination of SVE flags, or
- // some combination of SME flags, but we need to figure out which part
- // of the required features is satisfied by the target features.
- //
- // For a builtin with target guard 'sve2p1|sme2', if we compile with
- // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
- // evaluate the features for '+sve2p1,+sme,+nosme'.
- //
- // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
- // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
- StringRef BuiltinTargetGuards(
- S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+ NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+ StreamingBuiltinGuard, CallerFeatures);
if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
index d947cf78660ff..2143f27f95e45 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
@@ -1,7 +1,9 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
-// RUN: -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: -target-feature +bf16 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
+// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
@@ -9,10 +11,12 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
#include <arm_sve.h>
-#if defined __ARM_FEATURE_SME
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define MODE_ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
#define MODE_ATTR __arm_streaming
#else
#define MODE_ATTR
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 97c458fd01b7f..ad1128fd2b896 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -3,6 +3,8 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
@@ -12,7 +14,9 @@
#include <arm_sve.h>
-#ifdef __ARM_FEATURE_SME
+#if define...
[truncated]
|
@llvm/pr-subscribers-clang Author: Paul Walker (paulwalker-arm) ChangesBuiltins that are enabled via +sve2p1 in non-streaming mode and +sme{2} in streaming mode should also be enabled via +sve+sme{2} in non-streaming mode and +sme+sve2p1 in streaming mode. Patch is 55.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147086.diff 21 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 76fd072a41d8b..ac6f89f9afdbc 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -264,22 +264,22 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
}
-multiclass StructLoad<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructLoad])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructLoad])>;
}
}
// Load N-element structure into N vectors (scalar base)
-defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">;
-defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">;
-defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">;
+defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load N-element structure into N vectors (scalar base, VL displacement)
-defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">;
-defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">;
-defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">;
+defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
@@ -434,21 +434,21 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
} // let SVETargetGuard = "sve"
-multiclass StructStore<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructStore])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructStore])>;
}
}
// Store N vectors into N-element structure (scalar base)
-defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2">;
-defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3">;
-defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4">;
+defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store N vectors into N-element structure (scalar base, VL displacement)
-defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2">;
-defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
-defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
+defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store one vector, with no truncation, non-temporal (scalar base)
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
@@ -1325,7 +1325,7 @@ def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsT
def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVCREATE_2_B : SInst<"svcreate2[_b]", "2dd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
def SVCREATE_4_B : SInst<"svcreate4[_b]", "4dddd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
}
@@ -1350,18 +1350,17 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet
def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVGET_2_B : SInst<"svget2[_b]", "d2i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
def SVGET_4_B : SInst<"svget4[_b]", "d4i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
def SVSET_2_B : SInst<"svset2[_b]", "22id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
def SVSET_4_B : SInst<"svset4[_b]", "44id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
-}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVUNDEF_2_B: Inst<"svundef2_b", "2", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>;
def SVUNDEF_4_B: Inst<"svundef4_b", "4", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>;
}
+
////////////////////////////////////////////////////////////////////////////////
// SVE2 WhileGE/GT
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
@@ -1375,7 +1374,7 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PcPsPiPl", MergeNone, "
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2", [VerifyRuntimeMode]>;
def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2", [VerifyRuntimeMode]>;
def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2", [VerifyRuntimeMode]>;
@@ -1384,7 +1383,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2", [VerifyRuntimeMode]>;
def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2", [VerifyRuntimeMode]>;
def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2", [VerifyRuntimeMode]>;
-
}
////////////////////////////////////////////////////////////////////////////////
@@ -1999,7 +1997,7 @@ def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sv
def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x", [VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
+let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in {
def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [VerifyRuntimeMode], []>;
def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [VerifyRuntimeMode], []>;
@@ -2093,16 +2091,18 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
defm STNT1 : MultiVecStore<"stnt1">;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>;
def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [VerifyRuntimeMode], []>;
def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [VerifyRuntimeMode], []>;
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
+
+def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
+let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [VerifyRuntimeMode], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [VerifyRuntimeMode], []>;
@@ -2114,7 +2114,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, VerifyRuntimeMode]>;
- def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>;
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
}
@@ -2271,7 +2270,9 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,faminmax" in {
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
+}
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
// SQRSHRN / UQRSHRN
def SVQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "i", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
def SVUQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Ui", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
@@ -2383,7 +2384,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
//
// Multi-vector saturating extract narrow and interleave
//
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVQCVTN_S16_S32_X2 : SInst<"svqcvtn_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvtn_x2", [VerifyRuntimeMode], []>;
def SVQCVTN_U16_U32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvtn_x2", [VerifyRuntimeMode], []>;
def SVQCVTN_U16_S32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtun_x2", [VerifyRuntimeMode], []>;
@@ -2448,7 +2449,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
def SVFCVTN_X4 : Inst<"svcvtn_mf8[_{d}_x4]", "~4>", "f", MergeNone, "aarch64_sve_fp8_cvtn_x4", [IsOverloadNone, IsStreaming], []>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
// == BFloat16 multiply-subtract ==
def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, VerifyRuntimeMode], []>;
def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, VerifyRuntimeMode], []>;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 76318a068ce6a..bd603a925d15e 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
- llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
- CallerFeatureMapWithoutSVE["sve"] = false;
+ llvm::StringMap<bool> CallerFeatures;
+ S.Context.getFunctionFeatureMap(CallerFeatures, FD);
// Avoid emitting diagnostics for a function that can never compile.
- if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+ if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;
- llvm::StringMap<bool> CallerFeatureMapWithoutSME;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
- CallerFeatureMapWithoutSME["sme"] = false;
+ const auto FindTopLevelPipe = [](const char *S) {
+ unsigned Depth = 0;
+ unsigned I = 0, E = strlen(S);
+ for (; I < E; ++I) {
+ if (S[I] == '|' && Depth == 0)
+ break;
+ if (S[I] == '(')
+ ++Depth;
+ else if (S[I] == ')')
+ --Depth;
+ }
+ return I;
+ };
+
+ const char *RequiredFeatures =
+ S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+ unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+ assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+ "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+ StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+ StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
- // We know the builtin requires either some combination of SVE flags, or
- // some combination of SME flags, but we need to figure out which part
- // of the required features is satisfied by the target features.
- //
- // For a builtin with target guard 'sve2p1|sme2', if we compile with
- // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
- // evaluate the features for '+sve2p1,+sme,+nosme'.
- //
- // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
- // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
- StringRef BuiltinTargetGuards(
- S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+ NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+ StreamingBuiltinGuard, CallerFeatures);
if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
index d947cf78660ff..2143f27f95e45 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
@@ -1,7 +1,9 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
-// RUN: -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: -target-feature +bf16 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
+// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
@@ -9,10 +11,12 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
#include <arm_sve.h>
-#if defined __ARM_FEATURE_SME
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define MODE_ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
#define MODE_ATTR __arm_streaming
#else
#define MODE_ATTR
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 97c458fd01b7f..ad1128fd2b896 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -3,6 +3,8 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
@@ -12,7 +14,9 @@
#include <arm_sve.h>
-#ifdef __ARM_FEATURE_SME
+#if define...
[truncated]
|
@llvm/pr-subscribers-backend-arm Author: Paul Walker (paulwalker-arm) ChangesBuiltins that are enabled via +sve2p1 in non-streaming mode and +sme{2} in streaming mode should also be enabled via +sve+sme{2} in non-streaming mode and +sme+sve2p1 in streaming mode. Patch is 55.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147086.diff 21 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 76fd072a41d8b..ac6f89f9afdbc 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -264,22 +264,22 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
}
-multiclass StructLoad<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructLoad])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructLoad])>;
}
}
// Load N-element structure into N vectors (scalar base)
-defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">;
-defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">;
-defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">;
+defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load N-element structure into N vectors (scalar base, VL displacement)
-defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">;
-defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">;
-defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">;
+defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
@@ -434,21 +434,21 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
} // let SVETargetGuard = "sve"
-multiclass StructStore<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructStore])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructStore])>;
}
}
// Store N vectors into N-element structure (scalar base)
-defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2">;
-defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3">;
-defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4">;
+defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store N vectors into N-element structure (scalar base, VL displacement)
-defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2">;
-defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
-defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
+defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store one vector, with no truncation, non-temporal (scalar base)
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
@@ -1325,7 +1325,7 @@ def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsT
def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVCREATE_2_B : SInst<"svcreate2[_b]", "2dd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
def SVCREATE_4_B : SInst<"svcreate4[_b]", "4dddd", "Pc", MergeNone, "", [IsTupleCreate, VerifyRuntimeMode]>;
}
@@ -1350,18 +1350,17 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet
def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVGET_2_B : SInst<"svget2[_b]", "d2i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
def SVGET_4_B : SInst<"svget4[_b]", "d4i", "Pc", MergeNone, "", [IsTupleGet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
def SVSET_2_B : SInst<"svset2[_b]", "22id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
def SVSET_4_B : SInst<"svset4[_b]", "44id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
-}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVUNDEF_2_B: Inst<"svundef2_b", "2", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>;
def SVUNDEF_4_B: Inst<"svundef4_b", "4", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>;
}
+
////////////////////////////////////////////////////////////////////////////////
// SVE2 WhileGE/GT
let SVETargetGuard = "sve2", SMETargetGuard = "sme" in {
@@ -1375,7 +1374,7 @@ def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PcPsPiPl", MergeNone, "
def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2", [VerifyRuntimeMode]>;
def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2", [VerifyRuntimeMode]>;
def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2", [VerifyRuntimeMode]>;
@@ -1384,7 +1383,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2", [VerifyRuntimeMode]>;
def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2", [VerifyRuntimeMode]>;
def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2", [VerifyRuntimeMode]>;
-
}
////////////////////////////////////////////////////////////////////////////////
@@ -1999,7 +1997,7 @@ def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sv
def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x", [VerifyRuntimeMode]>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
+let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in {
def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [VerifyRuntimeMode], []>;
def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [VerifyRuntimeMode], []>;
@@ -2093,16 +2091,18 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
defm STNT1 : MultiVecStore<"stnt1">;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>;
def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [VerifyRuntimeMode], []>;
def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [VerifyRuntimeMode], []>;
def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>;
+
+def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in {
+let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in {
def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [VerifyRuntimeMode], []>;
def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [VerifyRuntimeMode], []>;
@@ -2114,7 +2114,6 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, VerifyRuntimeMode]>;
- def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>;
def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
}
@@ -2271,7 +2270,9 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,faminmax" in {
let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", MergeNone, "", [VerifyRuntimeMode], []>;
+}
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
// SQRSHRN / UQRSHRN
def SVQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "i", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
def SVUQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Ui", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>;
@@ -2383,7 +2384,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
//
// Multi-vector saturating extract narrow and interleave
//
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
def SVQCVTN_S16_S32_X2 : SInst<"svqcvtn_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvtn_x2", [VerifyRuntimeMode], []>;
def SVQCVTN_U16_U32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvtn_x2", [VerifyRuntimeMode], []>;
def SVQCVTN_U16_S32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtun_x2", [VerifyRuntimeMode], []>;
@@ -2448,7 +2449,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
def SVFCVTN_X4 : Inst<"svcvtn_mf8[_{d}_x4]", "~4>", "f", MergeNone, "aarch64_sve_fp8_cvtn_x4", [IsOverloadNone, IsStreaming], []>;
}
-let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
+let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in {
// == BFloat16 multiply-subtract ==
def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, VerifyRuntimeMode], []>;
def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, VerifyRuntimeMode], []>;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 76318a068ce6a..bd603a925d15e 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
- llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
- CallerFeatureMapWithoutSVE["sve"] = false;
+ llvm::StringMap<bool> CallerFeatures;
+ S.Context.getFunctionFeatureMap(CallerFeatures, FD);
// Avoid emitting diagnostics for a function that can never compile.
- if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+ if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;
- llvm::StringMap<bool> CallerFeatureMapWithoutSME;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
- CallerFeatureMapWithoutSME["sme"] = false;
+ const auto FindTopLevelPipe = [](const char *S) {
+ unsigned Depth = 0;
+ unsigned I = 0, E = strlen(S);
+ for (; I < E; ++I) {
+ if (S[I] == '|' && Depth == 0)
+ break;
+ if (S[I] == '(')
+ ++Depth;
+ else if (S[I] == ')')
+ --Depth;
+ }
+ return I;
+ };
+
+ const char *RequiredFeatures =
+ S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+ unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+ assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+ "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+ StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+ StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
- // We know the builtin requires either some combination of SVE flags, or
- // some combination of SME flags, but we need to figure out which part
- // of the required features is satisfied by the target features.
- //
- // For a builtin with target guard 'sve2p1|sme2', if we compile with
- // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
- // evaluate the features for '+sve2p1,+sme,+nosme'.
- //
- // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
- // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
- StringRef BuiltinTargetGuards(
- S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+ NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+ StreamingBuiltinGuard, CallerFeatures);
if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
index d947cf78660ff..2143f27f95e45 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c
@@ -1,7 +1,9 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
-// RUN: -target-feature +bf16 -target-feature +sme -target-feature +sme2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: -target-feature +bf16 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
+// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
@@ -9,10 +11,12 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
// RUN: -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
#include <arm_sve.h>
-#if defined __ARM_FEATURE_SME
+#if defined(__ARM_FEATURE_SME) && defined(__ARM_FEATURE_SVE)
+#define MODE_ATTR __arm_streaming_compatible
+#elif defined(__ARM_FEATURE_SME)
#define MODE_ATTR __arm_streaming
#else
#define MODE_ATTR
diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 97c458fd01b7f..ad1128fd2b896 100644
--- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -3,6 +3,8 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve2 -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
@@ -12,7 +14,9 @@
#include <arm_sve.h>
-#ifdef __ARM_FEATURE_SME
+#if define...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good.
…SME{2}. Builtins that are enabled via +sve2p1 in non-streaming mode and +sme{2} in streaming mode should also be enabled via +sve+sme{2} in non-stremaing mode and +sme+sve2p1 in streaming mode.
438ac92
to
4efe257
Compare
Builtins that are enabled via +sve2p1 in non-streaming mode and +sme{2} in streaming mode should also be enabled via +sve+sme{2} in non-streaming mode and +sme+sve2p1 in streaming mode.
The PR's first commit is #145941 and should be ignored. I'll rebase when the dependent PR lands.
NOTE: I plan to follow this up with a bigger change to bring in the SVE and SVE2 builtins using the same scheme. This PR is focused on those builtins that cannot be enabled using existing feature flags. For example, whilst
+sve+sme2
will not enable the SVE2 builtins, that's easily fixed by using+sve2+sme2
which is likely the original intent. However, without this PR there is no way to enable builtins likeclamp
when SME2 is available but SVE2p1 is not.